# SAT variance by income decile

library(tidyverse)


merged_df <- read.csv("merged_final.csv")
merged_df <- merged_df %>%
  filter(FAMILY_INCOME > 10000)
fi_cuts <- quantile(na.omit(merged_df$FAMILY_INCOME), 
                    probs = c(.1,.2,.3,.4,.5,.6,.7,.8,.9))
merged_df$fi <- merged_df$FAMILY_INCOME



merged_df <- merged_df %>%
  mutate(percentile = case_when(fi <= fi_cuts[1] ~ 1,
                                fi > fi_cuts[1] & fi <= fi_cuts[2] ~ 2,
                                fi > fi_cuts[2] & fi <= fi_cuts[3] ~ 3,
                                fi > fi_cuts[3] & fi <= fi_cuts[4] ~ 4,
                                fi > fi_cuts[4] & fi <= fi_cuts[5] ~ 5,
                                fi > fi_cuts[5] & fi <= fi_cuts[6] ~ 6,
                                fi > fi_cuts[6] & fi <= fi_cuts[7] ~ 7,
                                fi > fi_cuts[7] & fi <= fi_cuts[8] ~ 8,
                                fi > fi_cuts[8] & fi <= fi_cuts[9] ~ 9,
                                fi > fi_cuts[9] ~ 10))

for(i in 10:1){
  var <- var(merged_df[merged_df$percentile == i,]$RSAT_TOTAL_SCORE)
  avg <- mean(merged_df[merged_df$percentile == i,]$RSAT_TOTAL_SCORE)
  sd <- sqrt(var)
  print(paste("RHI Decile ", i, ": ", avg,sd))
}
